pacman::p_load(dplyr,readr,stringr)
rm(list=ls())
################################################################################

################################## Standardize country names and codes

df <- read_csv('../../data/trade/raw/faostat_world_land_area.csv', col_types = cols())
df <- df %>% dplyr::select(`Area Code`, Area, Continent)
colnames(df) <- c('country_code', 'faostat_country', 'continent')
df <- df %>% mutate(country = faostat_country) %>% 
  mutate(country = str_replace_all(country, c(
    'Bolivia \\(Plurinational State of\\)' = 'Bolivia',
    'Bonaire, Sint Eustatius and Saba' = 'Bonaire',
    'Brunei Darussalam' = 'Brunei',
    'Cabo Verde' = 'Cape Verde',
    'China, mainland' = 'China (mainland)',
    'China, Hong Kong SAR' = 'China (Hong Kong)',
    'China, Macao SAR' = 'China (Macao)',
    'China, Taiwan Province of' = 'Taiwan',
    'Curaçao' = 'Curacao',
    'Côte d\'Ivoire' = 'Cote d\'Ivoire',
    'Iran \\(Islamic Republic of\\)' = 'Iran',
    'Micronesia \\(Federated States of\\)' = 'Micronesia',
    "Lao People's Democratic Republic" = 'Laos',
    'Réunion' = 'Reunion',
    "Democratic People's Republic of Korea" = 'North Korea',
    'North Macedonia' = 'Macedonia',
    'Republic of Korea' = 'South Korea',
    'Republic of Moldova' = 'Moldova',
    'Russian Federation' = 'Russia',
    'Saint Barthélemy' = 'Saint Barthelemy',
    'Saint-Martin \\(French part\\)' = 'Saint-Martin',
    'Sint Maarten \\(Dutch part\\)' = 'Sint Martin',
    'Syrian Arab Republic' = 'Syria',
    'United Kingdom of Great Britain and Northern Ireland' = 'United Kingdom',
    'United Republic of Tanzania' = 'Tanzania',
    'Venezuela \\(Bolivarian Republic of\\)' = 'Venezuela',
    'Viet Nam' = 'Vietnam')))
df <- df %>% dplyr::select(-faostat_country)
write_csv(df, '../../data/trade/clean/country_codes.csv.gz')



################################## Imports from Argentina, Brazil, and ROW

#From entire world
df <- read_csv('../../data/trade/raw/faostat_imports_from_world_iv.csv', col_types = cols())
df <- df %>% rename(
  country_code = `Area Code`, year = Year, 
  destination = Area, element = Element, item = Item, 
  unit = Unit, value = Value) %>% dplyr::select(country_code, year, item, element, unit, value)
dfc <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols())
df <- dfc %>% inner_join(df, by = "country_code")
df <- df %>% mutate(continent = toupper(continent),
                    destination = toupper(country),
                    item = toupper(item),
                    element = toupper(element),unit = toupper(unit))
dfm <- df %>% dplyr::select(year, destination, item, element, unit, value)
dft <- dfm %>% filter(unit == 'TONNES') %>% mutate(imports_from_world = value)
dfv <- dfm %>% filter(unit == '1000 US$') %>% mutate(imports_from_world = value * 1000,unit = 'USD')
dfw <- bind_rows(dft, dfv)
dfw <- dfw %>% dplyr::select(year, destination, item, element, unit, imports_from_world)

#From Argentina
df <- read_csv('../../data/trade/raw/faostat_imports_from_argentina_brazil_iv.csv', col_types = cols())
df <- df %>% rename(
  country_code = `Reporter Country Code`,origin = `Partner Countries`,
  year = Year,element = Element,item = Item,
  unit = Unit,value = Value) %>% dplyr::select(country_code, origin, year, item, element, unit, value)
dfc <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols())
df <- dfc %>% inner_join(df, by = "country_code")
df <- df %>%
  mutate(continent = toupper(continent),origin = toupper(origin),
         destination = toupper(country),item = toupper(item),
         element = toupper(element),unit = toupper(unit))
dfm <- df %>% dplyr::select(year, origin, destination, item, element, unit, value)
dfa <- dfm %>% filter(origin == 'ARGENTINA')
dft <- dfa %>% filter(unit == 'TONNES') %>% mutate(imports_from_argentina = value)
dfv <- dfa %>% filter(unit == '1000 US$') %>% mutate(
  imports_from_argentina = value * 1000,
  unit = 'USD')
dfa <- bind_rows(dft, dfv) %>% dplyr::select(year, destination, item, element, unit, imports_from_argentina)

#From Brazil
dfb <- dfm %>% filter(origin == 'BRAZIL')
dft <- dfb %>% filter(unit == 'TONNES') %>% mutate(imports_from_brazil = value)
dfv <- dfb %>% filter(unit == '1000 US$') %>%
  mutate(imports_from_brazil = value * 1000,
         unit = 'USD')
dfb <- bind_rows(dft, dfv) %>% dplyr::select(year, destination, item, element, unit, imports_from_brazil)

#Merge all
df <- dfw %>%
  left_join(dfa, by = c('year', 'destination', 'item', 'element', 'unit')) %>%
  left_join(dfb, by = c('year', 'destination', 'item', 'element', 'unit'))
df <- df %>%
  mutate(imports_from_world = if_else(is.na(imports_from_world), 0, imports_from_world),
         imports_from_argentina = if_else(is.na(imports_from_argentina), 0, imports_from_argentina),
         imports_from_brazil = if_else(is.na(imports_from_brazil), 0, imports_from_brazil))
df <- df %>% mutate(residual_imports_from_world = imports_from_world - imports_from_brazil - imports_from_argentina)
df <- df %>% filter(residual_imports_from_world >= 0)
write_csv(df, '../../data/trade/clean/imports_from_world_iv.csv.gz')



################################## Temperature changes

df <- read_csv('../../data/trade/raw/faostat_temperature_change.csv', col_types = cols())
df <- df %>% rename( country_code = `Area Code`,year = Year,country = Area, element = Element,months = Months,unit = Unit,value = Value) %>%
  dplyr::select(country_code, year, element, months, unit, value)
dfc <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols())
df <- dfc %>% inner_join(df, by = "country_code")
write_csv(df, '../../data/trade/clean/world_temperature_change.csv.gz')



################################## Trade flows

dfc <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols()) %>% rename(`Area Code` = country_code)
df1 <- read_csv('../../data/trade/raw/all_importers_1991_2000.csv', col_types = cols())
df2 <- read_csv('../../data/trade/raw/all_importers_2001_2010.csv', col_types = cols())
df3 <- read_csv('../../data/trade/raw/all_importers_2011_2018.csv', col_types = cols())
df <- bind_rows(df1, df2, df3)
df <- dfc %>% inner_join(df, by = "Area Code")
df <- df %>%  mutate(Area = country)
df <- df %>% dplyr::select(-country)
write_csv(df, '../../data/trade/clean/all_importers_faostat.csv.gz')

##### Define set of major importers of South American commodities
N <- 10
min_share <- 0.05
df <- read_csv('../../data/trade/clean/all_importers_faostat.csv.gz', col_types = cols())
df <- dfc %>% inner_join(df, by = 'Area Code')
df <- df %>% mutate(Area = country) %>% dplyr::select(-country)
dfm <- df %>% group_by(Area, Item) %>% summarise(Value = sum(Value, na.rm = TRUE), .groups = 'drop')
dfl <- tibble()
for (item in unique(dfm$Item)) {
  df_item <- dfm %>%
    filter(Item == item) %>%
    mutate(share = Value / sum(Value, na.rm = TRUE)) %>%
    filter(share > min_share) %>%
    arrange(desc(share)) %>%
    slice_head(n = N) %>%
    dplyr::select(Area)
  dfl <- bind_rows(dfl, df_item)
}
dfl <- dfl %>% distinct(Area, .keep_all = TRUE) %>% arrange(Area)
write_csv(dfl, '../../data/trade/clean/major_importers_faostat.csv.gz')

##### Define set of major producers
df <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols())
dfc <- df %>% rename(`Area Code` = country_code)

#Processed crops
N <- 10
min_share <- 0.05
df <- read_csv('../../data/trade/raw/all_producers_processedcrop_production.csv', col_types = cols())
df <- dfc %>% inner_join(df, by = "Area Code")
df <- df %>% mutate(Area = country) %>%dplyr::select(-country)
dfm <- df %>% group_by(Area, Item) %>% summarise(Value = sum(Value, na.rm = TRUE), .groups = 'drop')
dfl <- tibble()
for (item in unique(dfm$Item)) {
  df_item <- dfm %>%
    filter(Item == item) %>%
    mutate(share = Value / sum(Value, na.rm = TRUE)) %>%
    filter(share > min_share) %>%
    arrange(desc(share)) %>%
    slice_head(n = N) %>%
    dplyr::select(Area)
  dfl <- bind_rows(dfl, df_item)
}
dfl_pcrops <- dfl %>% distinct(Area, .keep_all = TRUE)

#Crops
N <- 10
min_share <- 0.05
df <- read_csv('../../data/trade/raw/all_producers_crop_production.csv', col_types = cols())
df <- dfc %>% inner_join(df, by = "Area Code")
df <- df %>% mutate(Area = country) %>% dplyr::select(-country)
dfm <- df %>%  group_by(Area, Item) %>% summarise(Value = sum(Value, na.rm = TRUE), .groups = 'drop')
dfl <- tibble()
for (item in unique(dfm$Item)) {
  df_item <- dfm %>%
    filter(Item == item) %>%
    mutate(share = Value / sum(Value, na.rm = TRUE)) %>%
    filter(share > min_share) %>%
    arrange(desc(share)) %>%
    slice_head(n = N) %>%
    dplyr::select(Area)
  dfl <- bind_rows(dfl, df_item)
}
dfl_crops <- dfl %>%  distinct(Area, .keep_all = TRUE)

#Meat
N <- 10
min_share <- 0.05
df <- read_csv('../../data/trade/raw/all_producers_meat_production.csv', col_types = cols())
df <- dfc %>% inner_join(df, by = "Area Code")
df <- df %>%  mutate(Area = country) %>%  dplyr::select(-country)
dfm <- df %>%  group_by(Area, Item) %>%  summarise(Value = sum(Value, na.rm = TRUE), .groups = 'drop')
dfl <- tibble()
for (item in unique(dfm$Item)) {
  df_item <- dfm %>%
    filter(Item == item) %>%
    mutate(share = Value / sum(Value, na.rm = TRUE)) %>%
    filter(share > min_share) %>%
    arrange(desc(share)) %>%
    slice_head(n = N) %>%
    dplyr::select(Area)
  dfl <- bind_rows(dfl, df_item)
}
dfl_meat <- dfl %>% distinct(Area, .keep_all = TRUE)

#Append all
df <- bind_rows(dfl_pcrops, dfl_crops)
df <- bind_rows(df, dfl_meat)
df <- df %>%  distinct(Area, .keep_all = TRUE)
df <- df %>% arrange(Area)
write_csv(df, '../../data/trade/clean/major_producers_faostat.csv.gz')


##### Trade flows between major producer nations and major importer nations
df1 <- read_csv('../../data/trade/raw/tradeflows_1990_1995.csv', col_types = cols())
df2 <- read_csv('../../data/trade/raw/tradeflows_1996_2000.csv', col_types = cols())
df3 <- read_csv('../../data/trade/raw/tradeflows_2001_2005.csv', col_types = cols())
df4 <- read_csv('../../data/trade/raw/tradeflows_2006_2010.csv', col_types = cols())
df5 <- read_csv('../../data/trade/raw/tradeflows_2011_2015.csv', col_types = cols())
df6 <- read_csv('../../data/trade/raw/tradeflows_2016_2018.csv', col_types = cols())

df <- bind_rows(df1, df2, df3, df4, df5, df6)
dfc <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols()) %>%
  rename(`Reporter Country Code` = country_code)
df <- dfc %>%  inner_join(df, by = "Reporter Country Code")
df <- df %>% mutate(`Reporter Countries` = country) %>%  dplyr::select(-country, -continent)
dfc <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols()) %>% rename(`Partner Country Code` = country_code)
df <- dfc %>%  inner_join(df, by = "Partner Country Code")
df <- df %>% mutate(`Partner Countries` = country) %>% dplyr::select(-country)
dfv <- df %>% filter(Element == "Import Value")
dfq <- df %>% filter(Element == "Import Quantity")

#Values
df <- dfv %>%
  dplyr::select(`Reporter Countries`, `Partner Countries`, Item, Year, Value) %>%
  rename(destination_id = `Reporter Countries`,origin_id = `Partner Countries`,
         product = Item,year = Year,X = Value)
dfjc <- df %>% group_by(destination_id, product, year) %>% summarise(X = sum(X, na.rm = TRUE), .groups = 'drop')
df_v <- df %>%  inner_join(dfjc, by = c("year", "destination_id", "product"), suffix = c("_njt", "_jt"))

#Quantities
df <- dfq %>%
  dplyr::select(`Reporter Countries`, `Partner Countries`, Item, Year, Value) %>%
  rename(destination_id = `Reporter Countries`,origin_id = `Partner Countries`,
         product = Item,year = Year,Q = Value)
dfjc <- df %>% group_by(destination_id, product, year) %>% summarise(Q = sum(Q, na.rm = TRUE), .groups = 'drop')
df_q <- df %>% inner_join(dfjc, by = c("year", "destination_id", "product"), suffix = c("_njt", "_jt"))

#Merge
df <- df_v %>% inner_join(df_q, by = c("year", "destination_id", "origin_id", "product"))
write_csv(df, '../../data/trade/clean/tradeflows_faostat.csv.gz')


##### Clean producer prices

df <- read_csv('../../data/trade/raw/world_producer_prices.csv', col_types = cols())
df <- df %>% rename(country_code = `Area Code`,year = Year,country = Area,
    element = Element,item = Item,unit = Unit, value = Value ) %>%
  dplyr::select(country_code, year, item, element, unit, value)
dfc <- read_csv('../../data/trade/clean/country_codes.csv.gz', col_types = cols())
df <- dfc %>% inner_join(df, by = "country_code")
df <- df %>% dplyr::select(year, country, item, value) %>% rename(origin_id = country,product = item,p_ntc = value)
write_csv(df, '../../data/trade/clean/world_producer_prices.csv.gz')


################################## Emissions
df <- read_csv('../../data/environmental/raw/FAOSTAT_data_emissions.csv', col_types = cols())
df <- df %>% rename(year = Year,nation = Area,element = Element,stage = Item,unit = Unit,value = Value)
df <- df %>% mutate(value_t = value * 1000,value_million_t = value_t / 1000000)
df <- df %>% dplyr::select(year, nation, element, stage, value_million_t)
df <- df %>% filter(stage %in% c('Farm-gate emissions', 'Land Use change', 'Emissions on agricultural land'))
write_csv(df, '../../data/environmental/clean/FAOSTAT_data_emissions.csv.gz')


